####################################################
# EXTRACTS KEY VARIABLES FROM ORIGINAL CENSUS DATA
####################################################
# based on code written by Christoph Nolte
# questions to bowydenbraber@gmail.com

# Clear R's brain
#rm(list=ls())

library(xlsx)
library(foreign)
library(plyr)
library(readxl)

# States included in this analysis (uf = unidade federal)
ufs <- c("AC", "AM", "AP", "MA", "MT", "PA", "RO", "RR", "TO")


# CENSUS 2000

# Read meta data for key variables (names, document location, description)

file_bnd="BraCens2000 - Key Variables.csv"

v <- read.csv("C:/Users/vmw287/Documents/Not copenhagen/Sheffield_Brazil/BraCens/BraCens2000 - Key Variables.csv", sep=",", stringsAsFactors=FALSE)
docs <- unique(v$doc)

# Read data from XLS files ("docs")
for(i in 1:length(docs)) {
  doc <- docs[i]
  vars <- v$var[v$doc==doc & nchar(v$new)>0]
  vars.new <- v$new[v$doc==doc & nchar(v$new)>0] 
  for (uf in ufs) {
    xls <- read.xlsx(paste(uf,"/",doc,"_",uf,".xls",sep=""),sheetIndex = 1)
    xls <- xls[,c("Cod_setor",vars), drop=FALSE]
    for(vr in vars) xls[,vr] <- as.numeric(sub(",",".",xls[,vr]))
    if(uf==ufs[1]) cols <- xls else cols <- rbind(cols, xls)
  }
  colnames(cols) <- c("code",vars.new)
  rownames(cols) <- cols$code
  if(i==1) d <- cols else d <- merge(d,cols,by.x="code",by.y="code",all=TRUE)
}

# Aggregate summary indicators
# poor household heads
d$hhh.poor <- rowSums(d[,c("hhh.inc.no","hhh.inc.half")])
# poverty in water supply (ag = "agua")
d$hh.ag.poor <- rowSums(d[,c("hh.ag.poco","hh.ag.chuv.outro")])
# poverty in sanitation (bs = "banheiro/sanitario")
d$hh.bs.poor <- rowSums(d[,c("hh.bs.vala","hh.bs.rio","hh.bs.outro","hh.bs.no")])

# Write key variables
write.csv(d, "BraCens2000 - Key Data.csv", row.names=FALSE)
write.dbf(d, "BraCens2000 - Key Data.dbf")


# CENSUS 2010

# Read meta data for key variables (names, document location, description)
v <- read.csv("C:/Users/chrnolte/Documents/Code/R/BraCens/BraCens2010 - Key Variables.csv", sep=",", stringsAsFactors=FALSE)
docs <- unique(v$doc)

# Read data from XLS files ("docs")
d <- NULL
for(i in 1:length(docs)) {
  doc <- docs[i]
  vars <- v$var[v$doc==doc & nchar(v$new)>0]
  vars.new <- v$new[v$doc==doc & nchar(v$new)>0] 
  for (uf in ufs) {
    xls <- read.xls(paste(uf,"/",doc,"_",uf,".xls",sep=""),stringsAsFactors=FALSE)
    xls <- xls[,c("Cod_setor",vars), drop=FALSE]
    for(vr in vars) xls[,vr] <- as.numeric(sub(",",".",xls[,vr]))
    if(uf==ufs[1]) cols <- xls else cols <- rbind(cols, xls)
  }
  colnames(cols) <- c("code",vars.new)
  rownames(cols) <- cols$code
  if(i==1) d <- cols else d <- merge(d,cols,by.x="code",by.y="code",all=TRUE)
}

# Compute aggregate indicators (see above)
d$hh.REF <- ifelse(is.na(d$hhh)|is.na(d$hh),0,d$hh)
d$hhh.REF <- ifelse(is.na(d$hhh),0,d$hhh)
d$hhh.poor <- rowSums(d[,c("hhh.inc.no","hhh.inc.half")])
d$hh.ag.poor <- rowSums(d[,c("hh.ag.poco","hh.ag.chuv","hh.ag.outro")])
d$hh.bs.poor <- rowSums(d[,c("hh.bs.vala","hh.bs.rio","hh.bs.outro","hh.bs.no")])

write.csv(d, "BraCens2010 - Key Data.csv", row.names=FALSE)
write.dbf(d, "BraCens2010 - Key Data.dbf")





